t1 <- Sys.time()
EH23a <- read.csv("../FigureSideo/EH23a.softmasked_nuccomp.csv")
EH23a$chrom_num <- sub(".+chr", "", EH23a$Id)
EH23a$chrom_num[ EH23a$chrom_num == "X" ] <- 10
EH23a$chrom_num <- as.numeric(EH23a$chrom_num)
EH23a[1:3, ]
EH23a[, 1:2]
EH23b <- read.csv("../FigureSideo/EH23b.softmasked_nuccomp.csv")
EH23b$chrom_num <- sub(".+chr", "", EH23b$Id)
EH23b$chrom_num[ EH23b$chrom_num == "X" ] <- 10
EH23b$chrom_num <- as.numeric(EH23b$chrom_num)
EH23b[1:3, ]
syri <- read.table("/media/knausb/E737-9B48/knausb/mm2_maps/EH23a_EH23b/EH23a_EH23brcsyri.out",
sep = "\t", na.strings = "-")
names(syri) <- c("ref_chrom", "ref_start", "ref_end", "ref_seq", "query_seq", "query_chrom", "query_start", "query_end", "unique_ID", "parent_ID", "Annotation_Type", "Copy_Status")
#syri$ref_start[is.na(as.numeric(syri$ref_start))]
syri$ref_start <- as.numeric(syri$ref_start)
syri$ref_end <- as.numeric(syri$ref_end)
syri$query_start <- as.numeric(syri$query_start)
syri$query_end <- as.numeric(syri$query_end)
syri[1:3, ]
sort(table(syri$Annotation_Type), decreasing = TRUE)
syn <- syri[syri[, 11] == "SYN", ]
syn[1:3, ]
nrow(syn)
last_win <- 1
#coalesce <- 5e4
#coalesce <- 1e5
coalesce <- 4e5
#coalesce <- 1e6
for( j in 2:nrow(syn) ){
if( syn$ref_start[j] - syn$ref_end[last_win] < coalesce &
syn$query_start[j] - syn$query_end[last_win] < coalesce &
syn$ref_chrom[j] == syn$ref_chrom[last_win] ){
syn$ref_end[ last_win ] <- syn$ref_end[j]
syn$query_end[ last_win ] <- syn$query_end[j]
#syn$ref_end[j] <- NA
syn$unique_ID[j] <- NA
} else {
last_win <- j
}
}
#syn <- syn[ !is.na( syn$ref_end ), ]
syn <- syn[ !is.na( syn$unique_ID ), ]
syn[1:3, ]
nrow(syn)
min(syn$ref_end - syn$ref_start)
hist(syn$ref_end - syn$ref_start)
sort(syn$ref_end - syn$ref_start, decreasing = T)[1:10]
syn[1:3, ]
inv <- syri[syri$Annotation_Type == "INV", ]
dup <- syri[syri$Annotation_Type == "DUP", ]
dup[1:3, ]
nrow(dup)
trans <- syri[syri$Annotation_Type == "TRANS", ]
trans[1:3, ]
nrow(trans)
# nrow(syn)
# x <- syn[1:1000, ]
# nsmooth <- 10
# min_len <- 1e5
#gribbon <- function(x, nsmooth = 50, min_len = 1e5){
gribbon <- function(
x,
nsmooth = 10,
min_len = 1e5,
coalesce = 0,
invert = FALSE){
x <- x[(x$ref_end - x$ref_start) >= min_len, ]
x <- x[(x$query_end - x$query_start) >= min_len, ]
if( nrow(x) <= 0 ){
return("No rows")
}
# if( coalesce > 0 ){
# last_win <- 1
# for( j in 2:nrow(x) ){
# if( x$ref_start[j] - x$ref_end[last_win] < coalesce &
# x$query_start[j] - x$query_end[last_win] < coalesce ){
# x$ref_end[ last_win ] <- x$ref_end[j]
# x$query_end[ last_win ] <- x$query_end[j]
# x$ref_end[j] <- NA
# } else {
# last_win <- j
# }
# }
# x <- x[ !is.na( x$ref_end ), ]
# }
if( invert ){
tmp <- x$query_start
x$query_start <- x$query_end
x$query_end <- tmp
}
x$ref_chrom_num <- sub(".+chr", "", x$ref_chrom)
x$ref_chrom_num[ x$ref_chrom_num == "X" ] <- 10
x$ref_chrom_num[ x$ref_chrom_num == "Y" ] <- 11
x$ref_chrom_num <- as.numeric(x$ref_chrom_num)
x$ref_chrom_num <- x$ref_chrom_num - 0.2
x$query_chrom_num <- sub(".+chr", "", x$query_chrom)
x$query_chrom_num[ x$query_chrom_num == "X" ] <- 10
x$query_chrom_num[ x$query_chrom_num == "Y" ] <- 11
x$query_chrom_num <- as.numeric(x$query_chrom_num)
x$query_chrom_num <- x$query_chrom_num + 0.2
# nrow(x)
my_x <- seq( from = -4, to = 4, length.out = nsmooth)
my_y <- 1/( 1 + exp(1)^-my_x)
my_x <- (my_x + 4)/8
# plot(my_x, my_y)
# my_x <- c(my_x, rev(my_x))
my_polys <- data.frame(
ID = rep(x$unique_ID, each = nsmooth * 2),
ref_chrom_num = rep(x$ref_chrom_num, each = nsmooth * 2),
# ref_start = rep(NA, each = nsmooth * 2),
# ref_end = rep(NA, each = nsmooth * 2),
query_chrom_num = rep(x$query_chrom_num, each = nsmooth * 2),
# query_start = rep(NA, each = nsmooth * 2),
# query_end = rep(NA, each = nsmooth * 2)
poly_xs = rep(NA, each = nsmooth * 2),
poly_ys = rep(NA, each = nsmooth * 2)
)
my_polys[1:3, ]
my_vs <- seq(1, nrow(my_polys) + 1, by = nsmooth * 2)
# for( i in seq(1, nrow(my_polys), by = nsmooth * 2) ){
for( i in 1:nrow(x) ){
my_index <- my_vs[i]:(my_vs[i+1] - 1)
xmins <- seq( x$ref_chrom_num[i], x$query_chrom_num[i], length.out = nsmooth)
# my_y * (x$query_end[i] - x$query_start[i]) + x$query_start[i]
ymins <- my_y * (x$query_start[i] - x$ref_start[i]) + x$ref_start[i]
ymaxs <- my_y * (x$query_end[i] - x$ref_end[i]) + x$ref_end[i]
my_polys$poly_xs[my_index] <- c(xmins, rev(xmins))
my_polys$poly_ys[my_index] <- c(ymins, rev(ymaxs))
#x$ref_start[ my_index ] <- seq(x$ref_chrom[i], x$query_chrom[i], length.out = nsmooth)
}
# my_polys[1:3, ]
return( my_polys )
}
#poly_coords <- gribbon(syn[1:1000, ])
#poly_coords <- gribbon(syn, min_len = 1e5)
#
nrow(syn)
## [1] 40
poly_coords <- gribbon(syn, min_len = 1e1, coalesce = 0)
#poly_coords <- gribbon(syn, min_len = 1e3)
# poly_coords <- gribbon(syn, nsmooth = 40, min_len = 1e3)
# poly_coords <- gribbon(syn, min_len = 1e1)
length(unique(poly_coords$ID))
## [1] 40
poly_coords_inv <- gribbon(inv, min_len = 1e4, invert = TRUE)
poly_coords_dup <- gribbon(dup, min_len = 1e4)
poly_coords_trans <- gribbon(trans, min_len = 1e4)
cmd <- "~/gits/hempy/bin/fast_win.py /media/knausb/Vining_lab/knausb/mm2_maps/EH23a_EH23b_v2/EH23b.rc458X.softmasked.fasta.gz --win_size 1000000"
#system(cmd)
# sampn <- "EH23b"
get_wins <- function( sampn ){
# sampd <- "../FigureSideo/EH23a.softmasked_wins.csv"
wins_dir <- "../FigureSideo/"
gffs_dir <- "/media/knausb/E737-9B48/releases/scaffolded/"
# sampn <- unlist(lapply(strsplit(sampd, split = "/"), function(x){x[length(x)]}))
# sampn <- unlist(lapply(strsplit(sampd, split = "//"), function(x){x[length(x)]}))
# sampn <- sub(".softmasked_wins.csv", "", sampn)
# sub(paste(sampn, ".softmasked_wins.csv", sep = ""), "", sampd)
if( sampn == "EH23b" ){
wins <- read.csv( "EH23b.rc458X.softmasked_wins.csv" )
wins$Id <- sub("EH23a", "EH23b", wins$Id)
} else {
wins <- read.csv( paste( wins_dir, sampn, ".softmasked_wins.csv", sep = "") )
}
# wins <- read.csv( paste( sampn, ".softmasked_wins.csv", sep = "") )
# wins <- read.csv( sampd )
# wins <- wins[ grep( paste(sampn, ".chr", sep = ""), wins$Id ), ]
wins$chrn <- sub(".+chr", "", wins$Id)
wins$chrn[ wins$chrn == "X" ] <- 10
wins$chrn[ wins$chrn == "Y" ] <- 11
wins$chrn <- as.numeric(wins$chrn)
#wins[1:3, ]
# Scale and center.
# Scaline of CG windows (chromosome width) is on a per chromosome basis.
# wins$CGs <- 0
wins$CGs <- wins$CG / wins$Win_length * 100
# wins$notCG <- (wins$Win_length / 1) - wins$CG
# wins$notCGs <- 0
for( j in unique(wins$Id) ){
wins$CGs[ wins$Id == j] <- wins$CGs[ wins$Id == j] - min(wins$CGs[ wins$Id == j], na.rm = TRUE)
wins$CGs[ wins$Id == j] <- wins$CGs[ wins$Id == j] / max(wins$CGs[ wins$Id == j], na.rm = TRUE)
#
# wins$CGs[ wins$Id == j] <- wins$CG[ wins$Id == j] - min(wins$CG[ wins$Id == j], na.rm = TRUE)
# wins$CGs[ wins$Id == j] <- wins$CGs[ wins$Id == j]/max(wins$CGs[ wins$Id == j], na.rm = TRUE)
#
# wins$notCGs[ wins$Id == j] <- wins$notCG[ wins$Id == j] - min(wins$notCG[ wins$Id == j], na.rm = TRUE)
# wins$notCGs[ wins$Id == j] <- wins$notCGs[ wins$Id == j]/max(wins$notCGs[ wins$Id == j], na.rm = TRUE)
}
wins$iCGs <- 1 - wins$CGs
#wins$ATs <- 1 - wins$CGs
#wins$ATs <- wins$Win_length/1e6 - wins$CGs
# genes <- read.table(
# paste(sampd, "/", sampn, ".primary_high_confidence.gff3.gz", sep = "" ),
# sep = "\t" )
genes <- read.table(
paste(gffs_dir, sampn, "/", sampn, ".primary_high_confidence.gff3.gz", sep = "" ),
sep = "\t" )
genes <- genes[genes[, 3] == "gene", ]
if( sampn == "EH23b" ){
# Column 4 is starts.
genes[ genes[, 1] == "EH23b.chr4", 4] <- EH23b$Length[ EH23b$Id == "EH23b.chr4" ] - genes[ genes[, 1] == "EH23b.chr4", 4]
genes[ genes[, 1] == "EH23b.chr4", 5] <- EH23b$Length[ EH23b$Id == "EH23b.chr4" ] - genes[ genes[, 1] == "EH23b.chr4", 5]
genes[ genes[, 1] == "EH23b.chr5", 4] <- EH23b$Length[ EH23b$Id == "EH23b.chr5" ] - genes[ genes[, 1] == "EH23b.chr5", 4]
genes[ genes[, 1] == "EH23b.chr5", 5] <- EH23b$Length[ EH23b$Id == "EH23b.chr5" ] - genes[ genes[, 1] == "EH23b.chr5", 5]
genes[ genes[, 1] == "EH23b.chr8", 4] <- EH23b$Length[ EH23b$Id == "EH23b.chr8" ] - genes[ genes[, 1] == "EH23b.chr8", 4]
genes[ genes[, 1] == "EH23b.chr8", 5] <- EH23b$Length[ EH23b$Id == "EH23b.chr8" ] - genes[ genes[, 1] == "EH23b.chr8", 5]
genes[ genes[, 1] == "EH23b.chrX", 4] <- EH23b$Length[ EH23b$Id == "EH23b.chrX" ] - genes[ genes[, 1] == "EH23b.chrX", 4]
genes[ genes[, 1] == "EH23b.chrX", 5] <- EH23b$Length[ EH23b$Id == "EH23b.chrX" ] - genes[ genes[, 1] == "EH23b.chrX", 5]
}
genes[1:3, ]
# Windowize
wins$gcnt <- 0
for(i in 1:nrow(wins)){
# if( sampn == "EH23b" ){
# wins$Id <- sub("EH23a", "EH23b", wins$Id)
# }
tmp <- genes[genes$V1 == wins$Id[i] & genes$V4 >= wins$Start[i] & genes$V5 < wins$End[i], ]
wins$gcnt[i] <- nrow(tmp)
}
# Scaling of gene count windows is on a per assembly basis.
wins$gcntsc <- wins$gcnt - min(wins$gcnt)
wins$gcntsc <- wins$gcntsc / max(wins$gcntsc)
my_index <- round( wins$gcntsc * 100 )
my_index[ my_index <= 0] <- 1
# Color ramp.
#wins$gcol <- heat.colors(n=100)[ my_index ]
#wins$gcol <- colorRampPalette(c("yellow", "orange", "red"))( 100 )[ my_index ]
#wins$gcol <- colorRampPalette(c("red", "orange", "yellow"))( 100 )[ my_index ]
#wins$gcol <- colorRampPalette(c("#0000FF", "#228B22", "#A0522D"))( 100 )[ my_index ]
#wins$gcol <- colorRampPalette(c("#87CEEB", "#3CB371", "#228B22", "#A0522D"))( 100 )[ my_index ]
#wins$gcol <- viridisLite::plasma(n = 100, alpha = 1, begin = 0.1, end = 1)[ my_index ]
wins$gcol <- viridisLite::magma(n = 100, alpha = 1, begin = 0.2, end = 1.00)[ my_index ]
return(wins)
}
EH23a_wins <- get_wins("EH23a")
EH23a_wins[1:3, ]
EH23b_wins <- get_wins("EH23b")
EH23b_wins[1:3, ]
Windowize SNPs
EH23a_wins[1:3, ]
## Id Win_number Start End Win_length A C G T
## 1 EH23a.chr1 0 0 999999 1000000 339510 159349 157789 343352
## 2 EH23a.chr1 1 1000000 1999999 1000000 346184 152100 152139 349577
## 3 EH23a.chr1 2 2000000 2999999 1000000 343278 155236 154692 346794
## CG CHG CHH chrn CGs iCGs gcnt gcntsc gcol
## 1 14003 19695 90544 1 0.07933312 0.9206669 156 1.0000000 #FCFDBFFF
## 2 13018 18441 88693 1 0.00000000 1.0000000 141 0.9038462 #FED799FF
## 3 14100 19606 89229 1 0.08714562 0.9128544 123 0.7884615 #FEAD77FF
snps <- syri[syri$Annotation_Type == "SNP", ]
nrow(snps)
## [1] 3660371
snps[1:3, ]
## ref_chrom ref_start ref_end ref_seq query_seq query_chrom query_start
## 5 EH23a.chr1 6376 6376 c t EH23a.chr1 43110
## 8 EH23a.chr1 6542 6542 A G EH23a.chr1 43276
## 11 EH23a.chr1 6779 6779 t c EH23a.chr1 43514
## query_end unique_ID parent_ID Annotation_Type Copy_Status
## 5 43110 SNP21930 SYN1 SNP <NA>
## 8 43276 SNP21933 SYN1 SNP <NA>
## 11 43514 SNP21936 SYN1 SNP <NA>
EH23a_wins$snps <- 0
for(i in 1:nrow(EH23a_wins)){
tmp <- snps[ snps$ref_chrom == EH23a_wins$Id[i] & snps$ref_start >= EH23a_wins$Start[i] & snps$ref_end <= EH23a_wins$End[i], ]
EH23a_wins$snps[i] <- nrow(tmp)
}
EH23a_wins$snpssc <- (EH23a_wins$snps - min(EH23a_wins$snps))/max(EH23a_wins$snps)
hist(EH23a_wins$snps)
hist(EH23a_wins$snps[ EH23a_wins$Id == "EH23a.chr4" ])
my_hist <- hist(EH23a_wins$snpssc * 9 + 1, plot = F)
#barplot(my_hist$density, space = 0, col = gray.colors(n=10))
#axis(side=1)
barplot(my_hist$density ~ my_hist$breaks[-10], space = 0, col = gray.colors(n=10))
sampn <- "EH23a"
ablstn <- read.csv(
paste("/media/knausb/Vining_lab/knausb/blast_projects/todd_rrna/blastn_uc/", sampn, "_blastn.csv", sep = ""),
header = FALSE)
colnames(ablstn) <- c("qseqid","qlen","sseqid","slen","qstart","qend",
"sstart","send","evalue","bitscore","score","length",
"pident","nident","mismatch","positive","gapopen",
"gaps","ppos","sstrand")
ablstn <- ablstn[grep("chr", ablstn$sseqid), ]
ablstn$chrn <- sub(".*chr", "", ablstn$sseqid)
ablstn$chrn[ablstn$chrn == "X"] <- 10
ablstn$chrn[ablstn$chrn == "Y"] <- 11
ablstn$chrn <- as.numeric(ablstn$chrn)
sampn <- "EH23b"
bblstn <- read.csv(
paste("/media/knausb/Vining_lab/knausb/blast_projects/todd_rrna/blastn_uc/", sampn, "_blastn.csv", sep = ""),
header = FALSE)
colnames(bblstn) <- c("qseqid","qlen","sseqid","slen","qstart","qend",
"sstart","send","evalue","bitscore","score","length",
"pident","nident","mismatch","positive","gapopen",
"gaps","ppos","sstrand")
bblstn <- bblstn[grep("chr", bblstn$sseqid), ]
bblstn$chrn <- sub(".*chr", "", bblstn$sseqid)
bblstn$chrn[bblstn$chrn == "X"] <- 10
bblstn$chrn[bblstn$chrn == "Y"] <- 11
bblstn$chrn <- as.numeric(bblstn$chrn)
my_chrom <- "EH23b.chr4"
bblstn$sstart[ bblstn$sseqid == my_chrom ] <- EH23b$Length[ EH23b$Id == my_chrom ] - bblstn$sstart[ bblstn$sseqid == my_chrom ]
bblstn$send[ bblstn$sseqid == my_chrom ] <- EH23b$Length[ EH23b$Id == my_chrom ] - bblstn$send[ bblstn$sseqid == my_chrom ]
my_chrom <- "EH23b.chr5"
bblstn$sstart[ bblstn$sseqid == my_chrom ] <- EH23b$Length[ EH23b$Id == my_chrom ] - bblstn$sstart[ bblstn$sseqid == my_chrom ]
bblstn$send[ bblstn$sseqid == my_chrom ] <- EH23b$Length[ EH23b$Id == my_chrom ] - bblstn$send[ bblstn$sseqid == my_chrom ]
my_chrom <- "EH23b.chr8"
bblstn$sstart[ bblstn$sseqid == my_chrom ] <- EH23b$Length[ EH23b$Id == my_chrom ] - bblstn$sstart[ bblstn$sseqid == my_chrom ]
bblstn$send[ bblstn$sseqid == my_chrom ] <- EH23b$Length[ EH23b$Id == my_chrom ] - bblstn$send[ bblstn$sseqid == my_chrom ]
my_chrom <- "EH23b.chrX"
bblstn$sstart[ bblstn$sseqid == my_chrom ] <- EH23b$Length[ EH23b$Id == my_chrom ] - bblstn$sstart[ bblstn$sseqid == my_chrom ]
bblstn$send[ bblstn$sseqid == my_chrom ] <- EH23b$Length[ EH23b$Id == my_chrom ] - bblstn$send[ bblstn$sseqid == my_chrom ]
sampn <- "EH23b"
library(ggplot2)
chrom_wid <- 0.05
p <- ggplot2::ggplot()
p <- p + theme_bw()
p <- p + ggplot2::geom_rect(
data = EH23a,
ggplot2::aes( xmin = chrom_num - chrom_wid - 0.2,
xmax = chrom_num + chrom_wid - 0.2,
ymin = 1, ymax = Length),
fill = "#DCDCDC",
color = "#000000"
)
p <- p + ggplot2::geom_rect(
data = EH23b,
ggplot2::aes( xmin = chrom_num - chrom_wid + 0.2,
xmax = chrom_num + chrom_wid + 0.2,
ymin = 1, ymax = Length),
fill = "#DCDCDC",
color = "#000000"
)
# EH23a_wins[1:3, ]
my_cols <- gray.colors(n=10, start = 0.3, end = 0.9)[round(EH23a_wins$snpssc * 9 + 1)]
#my_cols <- gray.colors(n=100, start = 0.3, end = 0.9)[round(EH23a_wins$snpssc * 99 + 1)]
#p <-
p + ggplot2::geom_rect(
data = EH23a_wins,
ggplot2::aes( xmin = chrn - 0.5,
xmax = chrn - 0.25,
ymin = Start, ymax = End),
# fill = "#DCDCDC",
fill = my_cols,
# color = "#000000"
)
# Theme
p <- p + ggplot2::scale_x_continuous(
breaks = EH23a$chrom_num,
# limits = c(0.6, 10.4),
# limits = c(0.5, 10.5),
# labels = EH23a$Id
labels = sub("a.chr", ".chr", EH23a$Id)
)
p <- p + scale_y_continuous(
breaks = seq( 0, 120e6, by = 10e6),
labels = seq( 0, 120, by = 10)
)
# p <- p + ggplot2::theme_bw() +
p <- p + ggplot2::theme(
# panel.grid.minor.x = ggplot2::element_blank(),
panel.grid.minor.x = ggplot2::element_line( linewidth = 0.4, color = "#C0C0C0", linetype = 3 ),
axis.text.x = element_text(angle = 60, hjust = 1),
axis.title.x=element_blank(),
panel.grid.major.y = ggplot2::element_line( linewidth = 0.4, color = "#C0C0C0", linetype = 1 ),
panel.grid.minor.y = ggplot2::element_line( linewidth = 0.4, color = "#C0C0C0", linetype = 3 )
)
p <- p + ggplot2::ylab("Position (Mbp)")
p <- p + ggtitle( "EH23" )
p
#p <- p + theme(legend.position='none')
p <- p + geom_polygon(
data = poly_coords,
aes(x = poly_xs, y = poly_ys, fill = ID, group = ID), #, fill = col),
alpha = 2/5,
show.legend = FALSE,
#color = NA,
# color = "#80808022",
# color = "#000000",
color = "#696969",
# color = "#808080",
# color = "#A9A9A9",
# color = "#C0C0C0",
linewidth = 0.4,
# fill = ID
# fill = "#C0C0C044"
fill = "#808080"
)
p
p <- p + geom_polygon(
data = poly_coords_inv,
aes(x = poly_xs, y = poly_ys, fill = ID, group = ID),
alpha = 0.9,
show.legend = FALSE,
#color = NA,
# color = "#80808022",
#color = "#000000",
# linewidth = 0.1,
# fill = ID
# fill = "#C0C0C044"
fill = "#FFA500"
# fill = "#808080"
)
p
# p + geom_polygon(
# data = poly_coords_dup,
# aes(x = poly_xs, y = poly_ys, fill = ID, group = ID),
# alpha = 0.9,
# show.legend = FALSE,
# fill = "#1E90FF"
# )
# p + geom_polygon(
# data = poly_coords_trans,
# aes(x = poly_xs, y = poly_ys, fill = ID, group = ID),
# alpha = 0.9,
# show.legend = FALSE,
# fill = "#1E90FF"
# )
thinw <- 0.28
p <- p + ggplot2::geom_rect(
data = EH23a_wins,
ggplot2::aes(
xmin = chrn - iCGs * thinw - 0.2,
#xmax = chrn + iCGs * thinw,
xmax = chrn - 0.2,
ymin = Start,
ymax = End),
fill = EH23a_wins$gcol,
color = NA
)
p <- p + ggplot2::geom_rect(
data = EH23b_wins,
ggplot2::aes(
#xmin = chrn - iCGs * thinw - 0.2,
xmin = chrn + 0.2,
xmax = chrn + iCGs * thinw + 0.2,
#xmax = chrn - 0.2,
ymin = Start,
ymax = End),
fill = EH23b_wins$gcol,
color = NA
)
p
#p + xlim(3.5, 4.5) #+ ylim(1e6, 2e6)
p + xlim(5.5, 8.5) + ylim(10e6, 40e6)
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.
## Warning: Removed 10 rows containing missing values (`geom_rect()`).
## Removed 10 rows containing missing values (`geom_rect()`).
## Warning: Removed 654 rows containing missing values (`geom_rect()`).
## Warning: Removed 653 rows containing missing values (`geom_rect()`).
#pEH23 <- p
#save(pEH23, file = "ideo.RData")
#load("ideo.RData")
#pEH23
# Cannabinoids
my_rows <- grep("AB2|LY|NC_044376.1:c427", ablstn$qseqid)
blstw <- 0.48
p <- p + ggplot2::geom_rect(
data = ablstn[my_rows, ],
ggplot2::aes(
xmin = chrn - blstw,
xmax = chrn + 0 - 0.14,
ymin = sstart,
ymax = send),
fill = "#228B22",
color = '#228B22'
)
my_rows <- grep("AB2|LY|NC_044376.1:c427", bblstn$qseqid)
blstw <- 0.48
p <- p + ggplot2::geom_rect(
data = bblstn[my_rows, ],
ggplot2::aes(
xmin = chrn + 0.14,
xmax = chrn + blstw,
ymin = sstart,
ymax = send),
fill = "#228B22",
color = '#228B22'
)
# 5S
my_rows <- grep("5S_", ablstn$qseqid)
blstw <- 0.48
p <- p + ggplot2::geom_rect(
data = ablstn[my_rows, ],
ggplot2::aes(
xmin = chrn - blstw,
xmax = chrn - 0.1,
ymin = sstart,
ymax = send),
fill = "#000000",
color = '#000000'
)
my_rows <- grep("5S_", bblstn$qseqid)
blstw <- 0.48
p <- p + ggplot2::geom_rect(
data = bblstn[my_rows, ],
ggplot2::aes(
xmin = chrn + 0.1,
xmax = chrn + blstw,
ymin = sstart,
ymax = send),
fill = "#000000",
color = '#000000'
)
# 45S
my_rows <- grep("45s_|26s_|5.8s_|18s_", ablstn$qseqid)
blstw <- 0.48
p <- p + ggplot2::geom_rect(
data = ablstn[my_rows, ],
ggplot2::aes(
xmin = chrn - blstw,
xmax = chrn - 0.1,
ymin = sstart,
ymax = send),
fill = "#B22222",
linewidth = 2,
# linewidth = 4,
color = '#B22222'
)
my_rows <- grep("45s_|26s_|5.8s_|18s_", bblstn$qseqid)
blstw <- 0.48
p <- p + ggplot2::geom_rect(
data = bblstn[my_rows, ],
ggplot2::aes(
xmin = chrn + 0.1,
xmax = chrn + blstw,
ymin = sstart,
ymax = send),
fill = "#B22222",
linewidth = 2,
# linewidth = 4,
color = '#B22222'
)
# Centromeres, subtelomeric repeats
my_rows <- grep("AH3Ma_CS-237_satelite|CsatSD_centromere_237bp|CsatSD_centromere_370bp", ablstn$qseqid)
# blstw <- 0.48
p <- p + ggplot2::geom_rect(
data = ablstn[my_rows, ],
ggplot2::aes(
xmin = chrn - 0.5,
xmax = chrn - 0.2,
ymin = sstart,
ymax = send),
fill = "#1E90FF",
color = '#1E90FF'
)
my_rows <- grep("AH3Ma_CS-237_satelite|CsatSD_centromere_237bp|CsatSD_centromere_370bp", bblstn$qseqid)
p <- p + ggplot2::geom_rect(
data = bblstn[my_rows, ],
ggplot2::aes(
xmin = chrn + 0.2,
xmax = chrn + 0.5,
ymin = sstart,
ymax = send),
fill = "#1E90FF",
color = '#1E90FF'
)
pEH23 <- p
pEH23
library(vcfR)
##
## ***** *** vcfR *** *****
## This is vcfR 1.14.0
## browseVignettes('vcfR') # Documentation
## citation('vcfR') # Citation
## ***** ***** ***** *****
#vcf <- read.vcfR("F2s.variants5.freebayes.filt.3.vcf.gz", nrows = )
#vcf <- read.vcfR("F2s_0NA.vcf.gz", nrows = )
vcf <- read.vcfR("/media/knausb/Vining_lab/private_data/salk/VCFs/F2s_0NA.vcf.gz", nrows = )
t10 <- Sys.time()
vcf
## ***** Object of Class vcfR *****
## 271 samples
## 10 CHROMs
## 35,074 variants
## Object size: 462.4 Mb
## 0 percent missing data
## ***** ***** *****
#vcf <- vcf[is.biallelic(vcf), ]
#vcf
vcf@gt <- vcf@gt[, grep("Parent_23", colnames(vcf@gt), invert = TRUE)]
#colnames(vcf@gt)
vcf
## ***** Object of Class vcfR *****
## 270 samples
## 10 CHROMs
## 35,074 variants
## Object size: 458.4 Mb
## 0 percent missing data
## ***** ***** *****
vcf@fix[1:3, 1:8]
## CHROM POS ID REF ALT QUAL FILTER INFO
## [1,] "chr_1e" "62350" NA "G" "A" "246.804" "PASS" NA
## [2,] "chr_1e" "98473" NA "C" "T" "2122.13" "PASS" NA
## [3,] "chr_1e" "204226" NA "C" "G" "61311.8" "PASS" NA
#
nucs2 <- read.csv("~/gits/CannabisPangenome/FigureSideo/EH23b.softmasked_nuccomp.csv")
#nucs <- read.csv("~/gits/CannabisPangenome/FigureSideo/EH23b.softmasked_nuccomp.csv")
nucs <- read.csv("/media/knausb/Vining_lab/private_data/salk/ERBxHO40_23_v2_hap_ERB/nuccomp/ERBxHO40_23.combined.v2.hic.hap_ERB_nuccomp.csv")
cbind(nucs2$Id, nucs$Id)
## [,1] [,2]
## [1,] "EH23b.chr1" "chr_1e"
## [2,] "EH23b.chr5" "chr_2e"
## [3,] "EH23b.chr2" "chr_3e"
## [4,] "EH23b.chr3" "chr_4e"
## [5,] "EH23b.chr4" "chr_5e"
## [6,] "EH23b.chr7" "chr_6e"
## [7,] "EH23b.chr8" "chr_7e"
## [8,] "EH23b.chr9" "chr_8e"
## [9,] "EH23b.chr6" "chr_9e"
## [10,] "EH23b.chrX" "chr_Xe"
vcf@fix[, "CHROM"][ vcf@fix[, "CHROM"] == "chr_1e" ] <- "chr1"
vcf@fix[, "CHROM"][ vcf@fix[, "CHROM"] == "chr_2e" ] <- "chr5"
vcf@fix[, "CHROM"][ vcf@fix[, "CHROM"] == "chr_3e" ] <- "chr2"
vcf@fix[, "CHROM"][ vcf@fix[, "CHROM"] == "chr_4e" ] <- "chr3"
vcf@fix[, "CHROM"][ vcf@fix[, "CHROM"] == "chr_5e" ] <- "chr4"
vcf@fix[, "CHROM"][ vcf@fix[, "CHROM"] == "chr_6e" ] <- "chr7"
vcf@fix[, "CHROM"][ vcf@fix[, "CHROM"] == "chr_7e" ] <- "chr8"
vcf@fix[, "CHROM"][ vcf@fix[, "CHROM"] == "chr_8e" ] <- "chr9"
vcf@fix[, "CHROM"][ vcf@fix[, "CHROM"] == "chr_9e" ] <- "chr6"
vcf@fix[, "CHROM"][ vcf@fix[, "CHROM"] == "chr_Xe" ] <- "chrX"
#vcf@fix[, "CHROM"] <- sub("EH23b.", "", vcf@fix[, "CHROM"])
unique(vcf@fix[, "CHROM"])
## [1] "chr1" "chr5" "chr2" "chr3" "chr4" "chr7" "chr8" "chr9" "chr6" "chrX"
nucs$Id[ nucs$Id == "chr_1e" ] <- "chr1"
nucs$Id[ nucs$Id == "chr_2e" ] <- "chr5"
nucs$Id[ nucs$Id == "chr_3e" ] <- "chr2"
nucs$Id[ nucs$Id == "chr_4e" ] <- "chr3"
nucs$Id[ nucs$Id == "chr_5e" ] <- "chr4"
nucs$Id[ nucs$Id == "chr_6e" ] <- "chr7"
nucs$Id[ nucs$Id == "chr_7e" ] <- "chr8"
nucs$Id[ nucs$Id == "chr_8e" ] <- "chr9"
nucs$Id[ nucs$Id == "chr_9e" ] <- "chr6"
nucs$Id[ nucs$Id == "chr_Xe" ] <- "chrX"
nucs$chrom_num <- sub("^chr", "", nucs$Id)
nucs$chrom_num[ nucs$chrom_num == "X" ] <- 10
nucs$chrom_num <- as.numeric(nucs$chrom_num)
nucs <- nucs[sort.int(nucs$chrom_num, decreasing = FALSE, index.return = TRUE)$ix, ]
nucs$Id
## [1] "chr1" "chr2" "chr3" "chr4" "chr5" "chr6" "chr7" "chr8" "chr9" "chrX"
vcf2 <- vcf[ vcf@fix[, "CHROM"] == "chr1", ]
vcf2 <- rbind2(vcf2, vcf[ vcf@fix[, "CHROM"] == "chr2", ])
vcf2 <- rbind2(vcf2, vcf[ vcf@fix[, "CHROM"] == "chr3", ])
vcf2 <- rbind2(vcf2, vcf[ vcf@fix[, "CHROM"] == "chr4", ])
vcf2 <- rbind2(vcf2, vcf[ vcf@fix[, "CHROM"] == "chr5", ])
vcf2 <- rbind2(vcf2, vcf[ vcf@fix[, "CHROM"] == "chr6", ])
vcf2 <- rbind2(vcf2, vcf[ vcf@fix[, "CHROM"] == "chr7", ])
vcf2 <- rbind2(vcf2, vcf[ vcf@fix[, "CHROM"] == "chr8", ])
vcf2 <- rbind2(vcf2, vcf[ vcf@fix[, "CHROM"] == "chr9", ])
vcf2 <- rbind2(vcf2, vcf[ vcf@fix[, "CHROM"] == "chrX", ])
vcf <- vcf2
rm(vcf2)
table(vcf@fix[, "CHROM"])
##
## chr1 chr2 chr3 chr4 chr5 chr6 chr7 chr8 chr9 chrX
## 2925 4855 5700 3175 3218 4685 2879 2309 1969 3359
chrom_name <- "chr4"
vcf@fix[ , "POS"][ vcf@fix[, "CHROM"] == chrom_name ] <-
EH23b$Length[ EH23b$Id == "EH23b.chr4" ] -
as.numeric(vcf@fix[ , "POS"][ vcf@fix[, "CHROM"] == chrom_name ])
chrom_name <- "chr5"
vcf@fix[ , "POS"][ vcf@fix[, "CHROM"] == chrom_name ] <-
EH23b$Length[ EH23b$Id == "EH23b.chr5" ] -
as.numeric(vcf@fix[ , "POS"][ vcf@fix[, "CHROM"] == chrom_name ])
chrom_name <- "chr8"
vcf@fix[ , "POS"][ vcf@fix[, "CHROM"] == chrom_name ] <-
EH23b$Length[ EH23b$Id == "EH23b.chr8" ] -
as.numeric(vcf@fix[ , "POS"][ vcf@fix[, "CHROM"] == chrom_name ])
chrom_name <- "chrX"
vcf@fix[ , "POS"][ vcf@fix[, "CHROM"] == chrom_name ] <-
EH23b$Length[ EH23b$Id == "EH23b.chrX" ] -
as.numeric(vcf@fix[ , "POS"][ vcf@fix[, "CHROM"] == chrom_name ])
nucs[1:3, ]
## Id Length a A c C g G t T w W s S m M k K r R
## 1 chr1 63722626 0 21262072 0 10570136 0 10558163 0 21323755 0 0 0 0 0 0 0 0 0 0
## 3 chr2 80157000 0 26408690 0 13765866 0 13770753 0 26207691 0 0 0 0 0 0 0 0 0 0
## 4 chr3 83805000 0 27490094 0 14453690 0 14424197 0 27432019 0 0 0 0 0 0 0 0 0 0
## y Y n N chrom_num
## 1 0 0 0 8500 1
## 3 0 0 0 4000 2
## 4 0 0 0 5000 3
nucs$Id
## [1] "chr1" "chr2" "chr3" "chr4" "chr5" "chr6" "chr7" "chr8" "chr9" "chrX"
nucs$ends <- 0
nucs$ends[1] <- nucs$Length[1]
for(i in 2:nrow(nucs)){
nucs$ends[i] <- nucs$ends[i-1] + nucs$Length[i]
}
nucs$mids <- nucs$ends - nucs$Length/2
nucs[1:3, ]
## Id Length a A c C g G t T w W s S m M k K r R
## 1 chr1 63722626 0 21262072 0 10570136 0 10558163 0 21323755 0 0 0 0 0 0 0 0 0 0
## 3 chr2 80157000 0 26408690 0 13765866 0 13770753 0 26207691 0 0 0 0 0 0 0 0 0 0
## 4 chr3 83805000 0 27490094 0 14453690 0 14424197 0 27432019 0 0 0 0 0 0 0 0 0 0
## y Y n N chrom_num ends mids
## 1 0 0 0 8500 1 63722626 31861313
## 3 0 0 0 4000 2 143879626 103801126
## 4 0 0 0 5000 3 227684626 185782126
my_pos <- rePOS(vcf, lens = nucs)
my_popsum <- gt.to.popsum(vcf)
#my_popsum <- as.matrix(my_popsum)
my_popsum[1:3, ]
## n Allele_counts He Ne
## 1 270 509,31 0.1082236 1.121357
## 2 270 412,128 0.3617010 1.566664
## 3 270 263,277 0.4996639 1.998657
my_popsum <- cbind(vcf@fix[ , 1:2 ], my_popsum)
my_popsum[1:3, ]
## CHROM POS n Allele_counts He Ne
## 1 chr1 62350 270 509,31 0.1082236 1.121357
## 2 chr1 98473 270 412,128 0.3617010 1.566664
## 3 chr1 204226 270 263,277 0.4996639 1.998657
class(my_popsum)
## [1] "data.frame"
gt <- extract.gt(vcf)
t(apply(gt[1:4, ], MARGIN = 1, function(x){ table(x, useNA = 'always') }))
## 0/0 0/1 1/1 <NA>
## chr1_62350 240 29 1 0
## chr1_98473 145 122 3 0
## chr1_204226 65 133 72 0
## chr1_222275 63 144 63 0
nrow(gt) * 4
## [1] 140296
#, levels = c("0/0", "0/1", "1/1"))
#tmp <- apply(gt, MARGIN = 1, function(x){ table(x, useNA = 'always') })
tmp <- apply(gt, MARGIN = 1, function(x){
table(factor(x, levels = c("0/0", "0/1", "1/1", NA), exclude = NULL))
})
length(tmp)
## [1] 140296
range(lapply(tmp, length))
## [1] 1 1
tmp <- matrix(tmp, ncol = 4, byrow = TRUE)
colnames(tmp) <- c("count0.0", "count0.1", "count1.1", "countNA")
tmp[1:5, ]
## count0.0 count0.1 count1.1 countNA
## [1,] 240 29 1 0
## [2,] 145 122 3 0
## [3,] 65 133 72 0
## [4,] 63 144 63 0
## [5,] 61 141 68 0
my_popsum <- cbind(my_popsum, tmp)
my_popsum[1:3, ]
## CHROM POS n Allele_counts He Ne count0.0 count0.1 count1.1
## 1 chr1 62350 270 509,31 0.1082236 1.121357 240 29 1
## 2 chr1 98473 270 412,128 0.3617010 1.566664 145 122 3
## 3 chr1 204226 270 263,277 0.4996639 1.998657 65 133 72
## countNA
## 1 0
## 2 0
## 3 0
tmp <- matrix(unlist(strsplit(my_popsum$Allele_counts, split = ",")), ncol = 2, byrow = TRUE)
colnames(tmp) <- c("ERBcount", "HO40count")
my_popsum <- cbind(my_popsum, tmp)
my_popsum$ERBcount <- as.numeric(my_popsum$ERBcount)
my_popsum$HO40count <- as.numeric(my_popsum$HO40count)
my_popsum[1:3, ]
## CHROM POS n Allele_counts He Ne count0.0 count0.1 count1.1
## 1 chr1 62350 270 509,31 0.1082236 1.121357 240 29 1
## 2 chr1 98473 270 412,128 0.3617010 1.566664 145 122 3
## 3 chr1 204226 270 263,277 0.4996639 1.998657 65 133 72
## countNA ERBcount HO40count
## 1 0 509 31
## 2 0 412 128
## 3 0 263 277
class(my_popsum)
## [1] "data.frame"
my_popsum$ERBfreq <- my_popsum$ERBcount/(my_popsum[, "n"] * 2)
my_popsum$HO40freq <- my_popsum$HO40count/(my_popsum[, "n"] * 2)
my_popsum[1:3, ]
## CHROM POS n Allele_counts He Ne count0.0 count0.1 count1.1
## 1 chr1 62350 270 509,31 0.1082236 1.121357 240 29 1
## 2 chr1 98473 270 412,128 0.3617010 1.566664 145 122 3
## 3 chr1 204226 270 263,277 0.4996639 1.998657 65 133 72
## countNA ERBcount HO40count ERBfreq HO40freq
## 1 0 509 31 0.9425926 0.05740741
## 2 0 412 128 0.7629630 0.23703704
## 3 0 263 277 0.4870370 0.51296296
my_popsum$He2 <- 2 * my_popsum$ERBfreq * my_popsum$HO40freq
my_popsum$POS2 <- my_pos
my_popsum[1:3, ]
## CHROM POS n Allele_counts He Ne count0.0 count0.1 count1.1
## 1 chr1 62350 270 509,31 0.1082236 1.121357 240 29 1
## 2 chr1 98473 270 412,128 0.3617010 1.566664 145 122 3
## 3 chr1 204226 270 263,277 0.4996639 1.998657 65 133 72
## countNA ERBcount HO40count ERBfreq HO40freq He2 POS2
## 1 0 509 31 0.9425926 0.05740741 0.1082236 62350
## 2 0 412 128 0.7629630 0.23703704 0.3617010 98473
## 3 0 263 277 0.4870370 0.51296296 0.4996639 204226
# write.table(x = my_popsum, file = "allele_freqs.csv", sep = ",",
# row.names = FALSE, col.names = TRUE)
Hs <- 2 * my_popsum$ERBfreq * my_popsum$HO40freq
Ho <- my_popsum$count0.1 / my_popsum$n
my_popsum$Fis <- (Hs - Ho)/Hs
my_popsum[1:3, ]
## CHROM POS n Allele_counts He Ne count0.0 count0.1 count1.1
## 1 chr1 62350 270 509,31 0.1082236 1.121357 240 29 1
## 2 chr1 98473 270 412,128 0.3617010 1.566664 145 122 3
## 3 chr1 204226 270 263,277 0.4996639 1.998657 65 133 72
## countNA ERBcount HO40count ERBfreq HO40freq He2 POS2 Fis
## 1 0 509 31 0.9425926 0.05740741 0.1082236 62350 0.007541669
## 2 0 412 128 0.7629630 0.23703704 0.3617010 98473 -0.249241505
## 3 0 263 277 0.4870370 0.51296296 0.4996639 204226 0.014152174
#my_popsum <- my_popsum[seq(1, nrow(my_popsum), by = 2), ]
# hist(my_popsum$ERBfreq)
# hist(my_popsum$HO40freq)
#my_popsum <- my_popsum[my_popsum$ERBfreq >= 0.4, ]
#my_popsum <- my_popsum[my_popsum$ERBfreq <= 0.6, ]
#
#my_popsum <- my_popsum[my_popsum$HO40freq >= 0.4, ]
#my_popsum <- my_popsum[my_popsum$HO40freq <= 0.6, ]
my_min <- 0.3
my_max <- 0.7
my_popsum <- my_popsum[my_popsum$ERBfreq >= my_min, ]
my_popsum <- my_popsum[my_popsum$ERBfreq <= my_max, ]
#
my_popsum <- my_popsum[my_popsum$HO40freq >= my_min, ]
my_popsum <- my_popsum[my_popsum$HO40freq <= my_max, ]
my_min <- -0.5
my_max <- 0.5
my_popsum <- my_popsum[my_popsum$Fis >= my_min, ]
my_popsum <- my_popsum[my_popsum$Fis <= my_max, ]
par( mfrow = c(1, 2) )
#
hist(my_popsum$HO40freq, xlab = "", ylab = "Count", main = "HO40 allele frequency")
#
hist(my_popsum$ERBfreq, xlab = "", ylab = "Count", main = "ERB allele frequency")
par( mfrow = c(1, 1) )
barplot( table( my_popsum$CHROM ), las = 3 )
title( ylab = "Variant count" )
title( main = paste("Total Variant count:", format( nrow(my_popsum), big.mark = ",") ) )
my_popsum[1:3, ]
## CHROM POS n Allele_counts He Ne count0.0 count0.1 count1.1
## 3 chr1 204226 270 263,277 0.4996639 1.998657 65 133 72
## 4 chr1 222275 270 270,270 0.5000000 2.000000 63 144 63
## 5 chr1 249752 270 263,277 0.4996639 1.998657 61 141 68
## countNA ERBcount HO40count ERBfreq HO40freq He2 POS2 Fis
## 3 0 263 277 0.487037 0.512963 0.4996639 204226 0.01415217
## 4 0 270 270 0.500000 0.500000 0.5000000 222275 -0.06666667
## 5 0 263 277 0.487037 0.512963 0.4996639 249752 -0.04514694
nrow(my_popsum)
## [1] 22567
vcf@fix[1:3, 1:8]
## CHROM POS ID REF ALT QUAL FILTER INFO
## [1,] "chr1" "62350" NA "G" "A" "246.804" "PASS" NA
## [2,] "chr1" "98473" NA "C" "T" "2122.13" "PASS" NA
## [3,] "chr1" "204226" NA "C" "G" "61311.8" "PASS" NA
nrow(vcf)
## [1] 35074
tmp1 <- paste(getCHROM(vcf), getPOS(vcf), sep = "_")
tmp2 <- paste( my_popsum$CHROM, my_popsum$POS, sep = "_")
vcf <- vcf[tmp1 %in% tmp2, ]
vcf
## ***** Object of Class vcfR *****
## 270 samples
## 10 CHROMs
## 22,567 variants
## Object size: 260.4 Mb
## 0 percent missing data
## ***** ***** *****
# write.vcf(vcf, file = "tmsalk.vcf.gz")
afreq <- my_popsum
EH23a <- afreq[, c("CHROM", "POS", "n", "Allele_counts", "He", "Ne", "count0.0",
"count0.1", "count1.1", "countNA", "ERBcount", "HO40count", "ERBfreq",
"HO40freq", "He2", "POS2")]
EH23a$Frequency <- EH23a$ERBfreq
EH23a$facet1 <- "EH23a"
EH23b <- afreq[, c("CHROM", "POS", "n", "Allele_counts", "He", "Ne", "count0.0",
"count0.1", "count1.1", "countNA", "ERBcount", "HO40count", "ERBfreq",
"HO40freq", "He2", "POS2")]
EH23b$Frequency <- EH23a$HO40freq
EH23b$facet1 <- "EH23b"
Het <- afreq[, c("CHROM", "POS", "n", "Allele_counts", "He", "Ne", "count0.0",
"count0.1", "count1.1", "countNA", "ERBcount", "HO40count", "ERBfreq",
"HO40freq", "He2", "POS2")]
Het$Frequency <- EH23a$He2
Het$facet1 <- "Het"
Het[1:3, ]
## CHROM POS n Allele_counts He Ne count0.0 count0.1 count1.1
## 3 chr1 204226 270 263,277 0.4996639 1.998657 65 133 72
## 4 chr1 222275 270 270,270 0.5000000 2.000000 63 144 63
## 5 chr1 249752 270 263,277 0.4996639 1.998657 61 141 68
## countNA ERBcount HO40count ERBfreq HO40freq He2 POS2 Frequency
## 3 0 263 277 0.487037 0.512963 0.4996639 204226 0.4996639
## 4 0 270 270 0.500000 0.500000 0.5000000 222275 0.5000000
## 5 0 263 277 0.487037 0.512963 0.4996639 249752 0.4996639
## facet1
## 3 Het
## 4 Het
## 5 Het
Fis_df <- afreq[, c("CHROM", "POS", "n", "Allele_counts", "He", "Ne", "count0.0",
"count0.1", "count1.1", "countNA", "ERBcount", "HO40count", "ERBfreq",
"HO40freq", "He2", "POS2")]
#Fis_df$Frequency <- Fis
Fis_df$Frequency <- afreq$Fis
Fis_df$facet1 <- "Fis"
#
afreq <- rbind(EH23a, EH23b, Het, Fis_df)
#afreq <- rbind(EH23a, EH23b, Het)
afreq$facet2 <- afreq$facet1
afreq$facet2 <- sub("EH23[ab]", "EH23", afreq$facet2)
afreq$facet2[ afreq$facet2 == "EH23" ] <- "Allele"
afreq$facet2[ afreq$facet2 == "Het" ] <- "Heterozygosity"
#table(afreq$facet1)
table(afreq$facet2)
##
## Allele Fis Heterozygosity
## 45134 22567 22567
afreq[1:3, ]
## CHROM POS n Allele_counts He Ne count0.0 count0.1 count1.1
## 3 chr1 204226 270 263,277 0.4996639 1.998657 65 133 72
## 4 chr1 222275 270 270,270 0.5000000 2.000000 63 144 63
## 5 chr1 249752 270 263,277 0.4996639 1.998657 61 141 68
## countNA ERBcount HO40count ERBfreq HO40freq He2 POS2 Frequency
## 3 0 263 277 0.487037 0.512963 0.4996639 204226 0.487037
## 4 0 270 270 0.500000 0.500000 0.5000000 222275 0.500000
## 5 0 263 277 0.487037 0.512963 0.4996639 249752 0.487037
## facet1 facet2
## 3 EH23a Allele
## 4 EH23a Allele
## 5 EH23a Allele
library(ggplot2)
table(afreq$facet2)
##
## Allele Fis Heterozygosity
## 45134 22567 22567
#
afreq <- afreq[afreq$facet2 != "Heterozygosity", ]
#my_pal <- viridisLite::inferno(n = 10, begin = 0.2, end = 0.9, alpha = 0.01)
my_pal <- viridisLite::inferno(n = 10, begin = 0.2, end = 0.9, alpha = 0.04)
set.seed(99)
my_pal <- my_pal[sample(1:length(my_pal), size = length(my_pal))]
#palette(my_pal)
#as.numeric(as.factor(afreq$CHROM))
#afreq$col2 <- viridisLite::magma( n = 10, alpha = 0.8, begin = 0, end = 1 )[as.numeric(as.factor(afreq$CHROM))]
# range(afreq$POS2)
#afreq$facet2 <- factor(afreq$facet2, levels = c("Allele", "F[IS]"))
p <- ggplot( data = afreq,
mapping = aes( x = POS2,
#y = ERBfreq,
#y = freq,
y = Frequency,
color = CHROM )
#color = col2)
)
p <- p + geom_point( shape = 20, show.legend = FALSE )
p <- p + theme_bw()
p <- p + theme( axis.title.x=element_blank(),
axis.text.x = element_text(angle = 60, hjust = 1)
# plot.margin = unit(c(0.1,0.1,1,0.1), "cm")
)
#p <- p + theme(axis.title.x = element_blank(),
# axis.text.x = element_text( angle = 60, hjust = 1))
# p + scale_x_discrete(breaks = EH23a$mids2, labels = EH23a$Id)
# p + scale_x_discrete(breaks = EH23a$mids2)
p <- p + ggplot2::scale_x_continuous(
breaks = nucs$mids,
expand = expansion(mult = 0.01, add = 0.0),
# expand = expansion(mult = 0.02, add = 0.0),
labels = sub("a.chr", ".chr", nucs$Id)
)
p <- p + theme( panel.grid.major.x = element_line(color = "#A9A9A9", linewidth = 0.0 ) )
p <- p + theme( panel.grid.minor.x = element_line(color = "#A9A9A9", linewidth = 0.0 ) )
# p <- p + geom_vline( xintercept = nuccomp$Length2, linetype = 1, color = "#808080", linewidth = 0.4 )
# p <- p + geom_vline( xintercept = 0, linetype = 1, color = "#808080", linewidth = 0.4 )
#p <- p + geom_vline( xintercept = nuccomp$Length2, linetype = 5, color = "#808080", linewidth = 0.4 )
p <- p + geom_vline( xintercept = nucs$ends, linetype = 5, color = "#808080", linewidth = 0.4 )
p <- p + geom_vline( xintercept = 0, linetype = 5, color = "#808080", linewidth = 0.4 )
#p <- p + geom_hline( yintercept = 0.5, linetype = 1, color = "#000000", linewidth = 1 )
#p <- p + geom_hline( yintercept = c(0, 0.5), linetype = 1, color = "#000000", linewidth = 1 )
#p + scale_color_manual( values = viridisLite::magma( n = 10, alpha = 0.01, begin = 0.2, end = 0.9 ) )
#p <- p + xlim(100, sum(nuccomp$Length))
#p <- p + ylim(0.3, 0.7)
p <- p + scale_color_manual( values = my_pal )
#
#p <-
p + facet_grid(facet1 ~ ., scales = "free_y", labeller = label_parsed)
#p <-
#ahplot + ylab("")
# ahplot <- ahplot + scale_y_continuous( breaks = seq(-1.0, 1.0, by = 0.2),
# minor_breaks = seq(-1, 1, by = 0.1) )
ahplot <- p + facet_grid(facet2 ~ . , scales = "free_y", labeller = label_parsed)
#ahplot <- ahplot + theme( panel.grid.major.y = element_line(color = "#696969", linewidth = 0.6 ) )
ahplot <- ahplot + theme( panel.grid.major.y = element_line(color = "#A9A9A9", linewidth = 0.4 ) )
#ahplot + theme( panel.grid.major.y = element_line(color = "#C0C0C0", linewidth = 0.2, linetype = 1 ) )
# myPlots[[i]] <- myPlots[[i]] + theme( panel.grid.minor.y=element_line(color = "#C0C0C0", size=0.2) )
ahplot <- ahplot + ylab(NULL)
#p
ahplot
library("ggpubr")
ggarrange(
plotlist = list(pEH23, ahplot),
labels = c("A", "B"),
ncol = 1, nrow = 2,
widths = 1, heights = c(2, 1))
Figure 1. Genomic architecture of the Cannabis sativa genome.
# ggsave( filename = "EH23_chroms_freqs.tiff",
# device = "tiff",
# width = 6.5, height = 9, units = "in",
# dpi = 300,
# compression = "lzw")
t99 <- Sys.time()
t99 - t1
## Time difference of 4.173247 mins